**THIS FILE CONSTRUCTS THE FINAL FILES NEEDED TO REPLICATE DLR'S COLUMNS (5) AND (6) OF TABLE 2 (DLR, RESTAT, 2010)
**FOR THE EXACT COUNTY-PAIRS REPLICATION AND FOR THE MULTI-STATE CZONES ALTERNATIVE
**IT CONTAINS EXTRACTS FROM DLR'S REPLICATION PACKAGE FILE DATA_SETUP.DO


include directories_build.do


use "`dlrdir'`s'QCEW_industrydata_DLR.dta"


*TO USE EXACTLY THE SAME DATA FROM DLR REGRESSIONS IN COLUMNS (5) AND (6) OF TABLE 2 (DLR, RESTAT, 2010)
keep if nonmissing_rest_both==66


*QCEW CZONE-STATE DATA
save "`dlrdir'`s'QCEW_industrydata_co.dta", replace


****************************************************************************
** AGGREGATE DLR'S QCEW DATA TO THE COMMUTING ZONE-STATE LEVEL
****************************************************************************

clear all

use "`dlrdir'`s'QCEW_industrydata_co.dta"

*EXP OF ALL LOG VARIABLES BEFORE SUM COLLAPSE TO CZONE-STATE LEVEL
do "`dlrdir'`s'qcew_vars.do"

*MERGE COUNTY-CZONE-STATE DATA

destring county, gen(cty_fips)
merge m:1 cty_fips using "`czonedir'`s'cty_czone_state_boundaries.dta"
keep if _merge==3
drop _merge


*COLLAPSE TO CZONE-STATE LEVEL

collapse (sum) estab52-empTOT countypop2000 totwage_RETAIL totwage_ACFS totwage_OTHER empACFSRETAIL AWWACFSRETAIL empLOWWAGE AWWLOWWAGE totwage52-totwageLOWWAGE emp_rest_both totwage_rest_both-AWWFIRE pop exp_* (first) st state_fips state_name event_type st_fips cntyarea censusdiv countyreal state_min-three cbmsa (max) nonmiss*, by(czone state year quarter)


*EVEN THOUGH I KEEP CALLING IT "COUNTY" AND "COUNTYREAL", IT WILL NOW BE A CZONE-STATE ID (THIS IS DONE TO KEEP CHANGES TO DLR CODES TO A MINIMUM)

drop countyreal
gen countyreal=czone*100+state
gen county=countyreal

*TAKE LOG OF EXP VARIABLES FROM qcee_vars.do

do "`dlrdir'`s'qcew_vars2.do"
drop exp_*

*QCEW CZONE-STATE DATA
save "`dlrdir'`s'QCEW_industrydata_cz.dta", replace



************* FROM DLR'S DATA_SETUP.DO  *********************************

*******This sets up the contiguous counties main panel dataset***********

clear
drop _all
set more off

insheet using "`dlrdir'`s'county-pair-list_DLR.txt", comma
rename county countyreal
rename countypair_id pair_id


expand 96
egen pair_id_county = group(pair_id countyreal)
sort pair_id_county
generate firstob=1 if pair_id_county!= pair_id_county[_n-1]
gen time=1 if firstob
replace time=time[_n-1]+1 if firstob!=1

gen year = 1984 + floor((time-1)/4)
gen quarter = mod(time-1,4)+1 

sort year quarter countyreal 


merge m:m year quarter countyreal using "`dlrdir'`s'QCEW_industrydata_co.dta" 

**dropping records for counties that are not part of a county_pair within selected MSAs.
drop if _merge==2
drop _merge
 
rename time period

* drop if year<1990

egen state_fips_all = max(state_fips), by(countyreal)
replace state_fips = state_fips_all
drop state_fips_all


**recoding san francisco state code. IRRELEVANT FOR MULTI-STATE CZONES
replace state_fips=99 if countyreal==6075

*yellowstone ...
replace state_fips = 30 if countyreal==30113


sort state_fips year quarter

merge state_fips year quarter using "`dlrdir'`s'MW_yr_qtr_84_07_DLR.dta"

drop _merge

**** fix SF minwage IRRELEVANT FOR MULTI-STATE CZONES ***

gen CA = (state_fips==6)
egen CAminwage = max(minwage*CA)  , by(year quarter)

replace minwage = CAminwage if state_fips==99

replace minwage = 8.50 if state_fips==99 & year==2004
replace minwage = 8.62 if state_fips==99 & year==2005
replace minwage = 8.82 if state_fips==99 & year==2006
replace minwage = 9.15 if state_fips==99 & year==2007


*****************
gen lnMW = ln(minwage)

rename st_mw stminwage
rename fed_mw federalmin

*****************
 
gen all=1
gen event = (event_type<3)



egen pair_id_period = group(pair_id period)

********


egen nonmissing_both_pair = min(nonmissing_rest_both), by(pair_id)
egen lnMW_min_pairperiod = min(lnMW) , by(pair_id_period)
egen lnMW_max_pairperiod = max(lnMW) , by(pair_id_period)
gen lnMW_dif_period = (lnMW_min_pairperiod != lnMW_max_pairperiod) & ( period>=25 & period<=90)
egen lnMW_dif = max(lnMW_dif_period), by(pair_id)

 
gen lnMW_gap_pair = lnMW_max_pairperiod - lnMW_min_pairperiod

gen lnpop =ln(pop)

*CONTIGUOUS COUNTIES DATA
save "`dlrdir'`s'QCEWindustry_minwage_contig_co.dta", replace


************* FROM DLR'S DATA_SETUP.DO (NOW FOR MULTI-STATE CZONES) ***********

*******This sets up the multi-state czone main panel dataset*******************

clear
drop _all
set more off

*CZONE-STATE PAIRS (I KEEP USING THE "COUNTY" NAMES TO AVOID ALTERING DLR CODE)
use "`dlrdir'`s'czone_pairs_dlrrep_final.dta", clear
rename county countyreal
rename countypair_id pair_id


expand 96
egen pair_id_county = group(pair_id countyreal)
sort pair_id_county
generate firstob=1 if pair_id_county!= pair_id_county[_n-1]
gen time=1 if firstob
replace time=time[_n-1]+1 if firstob!=1

gen year = 1984 + floor((time-1)/4)
gen quarter = mod(time-1,4)+1 

sort year quarter countyreal 


merge m:m year quarter countyreal using "`dlrdir'`s'QCEW_industrydata_cz.dta" 

**dropping records for counties that are not part of a county_pair within selected MSAs.
drop if _merge==2
drop _merge
 
rename time period

* drop if year<1990

egen state_fips_all = max(state_fips), by(countyreal)
replace state_fips = state_fips_all
drop state_fips_all


**recoding san francisco state code. IRRELEVANT FOR MULTI-STATE CZONES
*replace state_fips=99 if countyreal==6075

*yellowstone ...
*replace state_fips = 30 if countyreal==30113


sort state_fips year quarter

merge state_fips year quarter using "`dlrdir'`s'MW_yr_qtr_84_07_DLR.dta"

drop _merge

**** fix SF minwage IRRELEVANT FOR MULTI-STATE CZONES ***

*gen CA = (state_fips==6)
*egen CAminwage = max(minwage*CA)  , by(year quarter)

*replace minwage = CAminwage if state_fips==99

*replace minwage = 8.50 if state_fips==99 & year==2004
*replace minwage = 8.62 if state_fips==99 & year==2005
*replace minwage = 8.82 if state_fips==99 & year==2006
*replace minwage = 9.15 if state_fips==99 & year==2007


*****************
gen lnMW = ln(minwage)

rename st_mw stminwage
rename fed_mw federalmin

*****************
 
gen all=1
gen event = (event_type<3)



egen pair_id_period = group(pair_id period)

********


egen nonmissing_both_pair = min(nonmissing_rest_both), by(pair_id)
egen lnMW_min_pairperiod = min(lnMW) , by(pair_id_period)
egen lnMW_max_pairperiod = max(lnMW) , by(pair_id_period)
gen lnMW_dif_period = (lnMW_min_pairperiod != lnMW_max_pairperiod) & ( period>=25 & period<=90)
egen lnMW_dif = max(lnMW_dif_period), by(pair_id)

 
gen lnMW_gap_pair = lnMW_max_pairperiod - lnMW_min_pairperiod

gen lnpop =ln(pop)

*CONTIGUOUS MULTI-STATE DATA
save "`dlrdir'`s'QCEWindustry_minwage_contig_cz.dta", replace

